if (!require("grid")) install.packages("grid")
library("grid")

# this is how you create a function in R
# the general format is name_of_function <- function(variable_name, etc.) 

# this function takes a subgroup variable (or moderator) -- var1 
# and shows predicted misperceptions across low and high values 
# low = below the median value, high = above the median value
# the default is a continuous or ordinal variable, but this can be overridden by including
# a binary = TRUE command

subgroup_analysis <- function(var1, binary = FALSE) {
  
  # wave2 includes all of the people who received a treatment
  # this extracts the values for the moderator
  # wave2[,var1] where var1 is the name of the moderator variable
  # try it out for authoritarianism: wave2[,"authoritarianism"] 
  moderator <- wave2[,var1]
  
  # if the variable is binary, .5 is the value that separates high and low values
  # this is due to the fact that the median of a binary variable is meaningless
  # but if the value is .5, var1 > .5 effectively = 1, and var1 < .5 = 0

  t1 <- paste("Emotions and Misperceptions ", "(", tools::toTitleCase(var1), " > Median)", sep = "")
  t2 <- paste("Emotions and Misperceptions ", "(", tools::toTitleCase(var1), " < Median)", sep = "")
  
  if (binary == TRUE){
    value <- .5
    t1 <- paste("Emotions and Misperceptions ", "(", tools::toTitleCase(var1), " = 1)", sep = "")
    t2 <- paste("Emotions and Misperceptions ", "(", tools::toTitleCase(var1), " = 0)", sep = "")
  }
  
  # if binary is not specified, the value that separates high and low values is the median
  # 50% of values lie above and below the median so this is a natural way to split a variable
  # effectively splits the sample in half
  
  else {value <- median(moderator, na.rm = TRUE)}

  # what are the effects of anger and anxiety on local perceptions when your moderator (authoritarianism, social dominance, etc.) is greater than the median (or .5 if it is binary)?
  m1 <- lm(I(local_white - pctwhite15) ~ anger + anxiety, wave2, moderator > value)
  m2 <- lm(I(local_black - pctblack15) ~ anger + anxiety, wave2, moderator > value)
  m3 <- lm(I(local_asian - pctasian15) ~ anger + anxiety, wave2, moderator > value)
  m4 <- lm(I(local_hispanic - pcthisp15) ~ anger + anxiety, wave2, moderator > value)
  m5 <- lm(I(local_immigrant - pctimm15) ~ anger + anxiety, wave2, moderator > value)
  
  # what are the effects of anger and anxiety on national perceptions when your moderator (authoritarianism, social dominance, etc.) is greater than the median (or .5 if it is binary)?
  m6 <- lm(I(national_white - 61.5) ~ anger + anxiety, wave2, moderator > value)
  m7 <- lm(I(national_black - 12.3) ~ anger + anxiety, wave2, moderator > value)
  m8 <- lm(I(national_asian - 5.3) ~ anger + anxiety, wave2, moderator > value)
  m9 <- lm(I(national_hispanic - 17.6) ~ anger + anxiety, wave2, moderator > value)
  m10 <- lm(I(national_immigrant - 13) ~ anger + anxiety, wave2, moderator > value)
  
  # this generates predicted perceptions for those in the anger condition across outcomes
  coefs1 <- c(sum(coef(m1)[1:2]), sum(coef(m2)[1:2]), sum(coef(m3)[1:2]), sum(coef(m4)[1:2]), sum(coef(m5)[1:2]), sum(coef(m6)[1:2]), sum(coef(m7)[1:2]), sum(coef(m8)[1:2]), sum(coef(m9)[1:2]), sum(coef(m10)[1:2]))
  
  # this generates predicted perceptions for those in the anxiety condition across outcomes
  coefs2 <- c(sum(coef(m1)[c(1,3)]), sum(coef(m2)[c(1,3)]), sum(coef(m3)[c(1,3)]), sum(coef(m4)[c(1,3)]), sum(coef(m5)[c(1,3)]), sum(coef(m6)[c(1,3)]), sum(coef(m7)[c(1,3)]), sum(coef(m8)[c(1,3)]), sum(coef(m9)[c(1,3)]), sum(coef(m10)[c(1,3)]))
  
  # this generates predicted perceptions for those in the relaxation condition across outcomes
  coefs3 <- c(coef(m1)[1], coef(m2)[1], coef(m3)[1], coef(m4)[1], coef(m5)[1], coef(m6)[1], coef(m7)[1], coef(m8)[1], coef(m9)[1], coef(m10)[1])
  coefs4 <- c(coef(m1)[1], coef(m2)[1], coef(m3)[1], coef(m4)[1], coef(m5)[1], coef(m6)[1], coef(m7)[1], coef(m8)[1], coef(m9)[1], coef(m10)[1])
  
  # this generates standard errors for those in the anger condition across outcomes
  ses1 <- c(se.coef(m1)[2], se.coef(m2)[2], se.coef(m3)[2], se.coef(m4)[2], se.coef(m5)[2], se.coef(m6)[2], se.coef(m7)[2], se.coef(m8)[2], se.coef(m9)[2], se.coef(m10)[2])
  
  # this generates standard errors for those in the anxiety condition across outcomes
  ses2 <- c(se.coef(m1)[3], se.coef(m2)[3], se.coef(m3)[3], se.coef(m4)[3], se.coef(m5)[3], se.coef(m6)[3], se.coef(m7)[3], se.coef(m8)[3], se.coef(m9)[3], se.coef(m10)[3])
  
  # this is the difference between the relaxation and anger condition (e.g. average treatment effect)
  ate1 <- c(coef(m1)[2], coef(m2)[2], coef(m3)[2], coef(m4)[2], coef(m5)[2], coef(m6)[2], coef(m7)[2], coef(m8)[2], coef(m9)[2], coef(m10)[2])
  
  # this is the difference between the relaxation and anxiety condition (e.g. average treatment effect)
  ate2 <- c(coef(m1)[3], coef(m2)[3], coef(m3)[3], coef(m4)[3], coef(m5)[3], coef(m6)[3], coef(m7)[3], coef(m8)[3], coef(m9)[3], coef(m10)[3])
  
  # this calculates p-values
  # a p-value is the probability of obtaining an average treatment effect as extreme as the one observed, assuming the null hypothesis of no effect is true
  pvals <- c(summary(m1)$coefficients[[11]], summary(m2)$coefficients[[11]], summary(m3)$coefficients[[11]], summary(m4)$coefficients[[11]], summary(m5)$coefficients[[11]], summary(m6)$coefficients[[11]], summary(m7)$coefficients[[11]],summary(m8)$coefficients[[11]],summary(m9)$coefficients[[11]], summary(m10)$coefficients[[11]], summary(m1)$coefficients[[12]], summary(m2)$coefficients[[12]], summary(m3)$coefficients[[12]], summary(m4)$coefficients[[12]], summary(m5)$coefficients[[12]], summary(m6)$coefficients[[12]], summary(m7)$coefficients[[12]],summary(m8)$coefficients[[12]],summary(m9)$coefficients[[12]], summary(m10)$coefficients[[12]])
             
  # stars -- an applied statistics convention
  # focus more on magnitude though!
  
  stars <- sapply(1:20, function(x) ifelse(pvals[x] < .05, "*", "N.S."))
  
  # this creates a data frame with all of the aforementioned values
  df <- data.frame(coefs = c(coefs1, coefs2), ses = c(ses1,ses2), intercepts = c(coefs3, coefs4), ates = c(ate1, ate2))
  
  # labels for emotions
  df$emotions <- rep(c("Anger", "Anxiety"), each = 10)
  
  # labels for level
  df$level <- rep(c("Local", "National"), each = 5, length.out = 20)
  df$level <- factor(df$level)
  
  # labels for demographic groups
  df$groups <- factor(rep(c("White", "Black", "Asian", "Hispanic", "Immigrant"),  4))
  df$groups <- factor(df$groups, c("White", "Black", "Asian", "Hispanic", "Immigrant"))
  
  # appends p-values and stars for each model
  df$pvals <- pvals
  df$stars <- stars
  
  df$em_lev <- paste(df$emotions, " (", df$level, ")", sep = "")
  
  df$em_lev <- factor(df$em_lev, levels = c("Anger (Local)", "Anxiety (Local)", "Anger (National)", "Anxiety (National)"))
  
  # saves the first set of values for those who have high values on the selected moderator
  df1 <- df
  
  # this adds a column reminding you the data was generated using high values of the selected moderator
  df1$moderator <- paste("(high ", var1, ")", sep = "")
  
  # ggplot, the plotting software, likes the data to be in a specific order
  df <- df[rep(c(1,6,11,16), 5) + rep(0:4, each = 4),]
  
  # this generates 95 percent confidence intervals
  # assuming a normal distribution, 95 percent of values lie between 1.96 standard deviations of the mean
  # this calculates the confidence intervals for each estimate
  # ymax = upper end, ymin = lower end
  limits <- aes(ymax = coefs + ses*1.96, ymin= coefs - ses*1.96)
  
  # this fixes the order of the rows
  row.names(df) <- 1:20
  
  # this creates the title at the top of the relevant plot
  #t1 <- paste("Emotions and Misperceptions ", "(", var1, " = 1)", sep = "")
  
  # this plots point estimates (average misperceptions)
  # geom_pointrange plots the confidence intervals
  # theme_bw = black and white theme
  # labs = labels
  p <- ggplot(df, aes(x=groups, y=coefs, col = em_lev, shape = em_lev)) + geom_pointrange(limits, position=position_dodge(width=.5)) + theme_bw() + labs(x = "Outcomes", y = "Average Misperception", title = t1) + scale_colour_manual(name = "Condition & Outcome", labels = c("Anger (Local)", "Anxiety (Local)", "Anger (National)", "Anxiety (National)"), values = c("black", "black", "gray", "gray")) + scale_shape_manual(name = "Condition & Outcome", labels = c("Anger (Local)", "Anxiety (Local)", "Anger (National)", "Anxiety (National)"), values = c(19, 17, 19, 17))
  
  # this is really ugly code that automatically generates the relaxation intercepts
  # ggplot does not have a clean way of dealing with this :(
  intercepts <- rep(c(0, 4, 8, 12, 16), each = 2) + c(1,2)
  x1 <- rep(c(1, 2, 3, 4, 5), each = 2) + c(-.25, .25)
  x2 <- rep(c(1, 2, 3, 4, 5), each = 2)
  
  # same
  for (i in intercepts) {
    
    s <- which(i == intercepts)
    
    # relaxation intercept plots
    p <- p + geom_segment(x = x1[s], y = df$intercepts[i], 
                          xend = x2[s], yend = df$intercepts[i], col = "black")
    
  }
  
  # this converts the plot into gray scale and adjusts the title so it's in the center
  p1 <- p + theme(plot.title = element_text(hjust = 0.5)) + theme(text = element_text(size=20))
  
  # see above
  # except here we're looking at low values of the moderator
  m1 <- lm(I(local_white - pctwhite15) ~ anger + anxiety, wave2, moderator < value)
  m2 <- lm(I(local_black - pctblack15) ~ anger + anxiety, wave2, moderator < value)
  m3 <- lm(I(local_asian - pctasian15) ~ anger + anxiety, wave2, moderator < value)
  m4 <- lm(I(local_hispanic - pcthisp15) ~ anger + anxiety, wave2, moderator < value)
  m5 <- lm(I(local_immigrant - pctimm15) ~ anger + anxiety, wave2, moderator < value)
  
  m6 <- lm(I(national_white - 61.5) ~ anger + anxiety, wave2, moderator < value)
  m7 <- lm(I(national_black - 12.3) ~ anger + anxiety, wave2, moderator < value)
  m8 <- lm(I(national_asian - 5.3) ~ anger + anxiety, wave2, moderator < value)
  m9 <- lm(I(national_hispanic - 17.6) ~ anger + anxiety, wave2, moderator < value)
  m10 <- lm(I(national_immigrant - 13) ~ anger + anxiety, wave2, moderator < value)
  
  coefs1 <- c(sum(coef(m1)[1:2]), sum(coef(m2)[1:2]), sum(coef(m3)[1:2]), sum(coef(m4)[1:2]), sum(coef(m5)[1:2]), sum(coef(m6)[1:2]), sum(coef(m7)[1:2]), sum(coef(m8)[1:2]), sum(coef(m9)[1:2]), sum(coef(m10)[1:2]))
  
  coefs2 <- c(sum(coef(m1)[c(1,3)]), sum(coef(m2)[c(1,3)]), sum(coef(m3)[c(1,3)]), sum(coef(m4)[c(1,3)]), sum(coef(m5)[c(1,3)]), sum(coef(m6)[c(1,3)]), sum(coef(m7)[c(1,3)]), sum(coef(m8)[c(1,3)]), sum(coef(m9)[c(1,3)]), sum(coef(m10)[c(1,3)]))
  
  coefs3 <- c(coef(m1)[1], coef(m2)[1], coef(m3)[1], coef(m4)[1], coef(m5)[1], coef(m6)[1], coef(m7)[1], coef(m8)[1], coef(m9)[1], coef(m10)[1])
  
  coefs4 <- c(coef(m1)[1], coef(m2)[1], coef(m3)[1], coef(m4)[1], coef(m5)[1], coef(m6)[1], coef(m7)[1], coef(m8)[1], coef(m9)[1], coef(m10)[1])
  
  ses1 <- c(se.coef(m1)[2], se.coef(m2)[2], se.coef(m3)[2], se.coef(m4)[2], se.coef(m5)[2], se.coef(m6)[2], se.coef(m7)[2], se.coef(m8)[2], se.coef(m9)[2], se.coef(m10)[2])
  
  ses2 <- c(se.coef(m1)[3], se.coef(m2)[3], se.coef(m3)[3], se.coef(m4)[3], se.coef(m5)[3], se.coef(m6)[3], se.coef(m7)[3], se.coef(m8)[3], se.coef(m9)[3], se.coef(m10)[3])
  
  ate1 <- c(coef(m1)[2], coef(m2)[2], coef(m3)[2], coef(m4)[2], coef(m5)[2], coef(m6)[2], coef(m7)[2], coef(m8)[2], coef(m9)[2], coef(m10)[2])
  ate2 <- c(coef(m1)[3], coef(m2)[3], coef(m3)[3], coef(m4)[3], coef(m5)[3], coef(m6)[3], coef(m7)[3], coef(m8)[3], coef(m9)[3], coef(m10)[3])
  
  stars <- sapply(1:20, function(x) ifelse(pvals[x] < .05, "*", "N.S."))
  
  df <- data.frame(coefs = c(coefs1, coefs2), ses = c(ses1,ses2), intercepts = c(coefs3, coefs4), ates = c(ate1, ate2))
  df$emotions <- rep(c("Anger", "Anxiety"), each = 10)
  df$level <- rep(c("Local", "National"), each = 5, length.out = 20)
  df$level <- factor(df$level)
  df$groups <- factor(rep(c("White", "Black", "Asian", "Hispanic", "Immigrant"),  4))
  df$groups <- factor(df$groups, c("White", "Black", "Asian", "Hispanic", "Immigrant"))
  df$em_lev <- paste(df$emotions, " (", df$level, ")", sep = "")
  
  df$em_lev <- factor(df$em_lev, levels = c("Anger (Local)", "Anxiety (Local)", "Anger (National)", "Anxiety (National)"))
  
  pvals <- c(summary(m1)$coefficients[[11]], summary(m1)$coefficients[[12]], summary(m2)$coefficients[[11]], summary(m2)$coefficients[[12]], summary(m3)$coefficients[[11]], summary(m3)$coefficients[[12]], summary(m4)$coefficients[[11]], summary(m4)$coefficients[[12]],summary(m5)$coefficients[[11]], summary(m5)$coefficients[[12]],summary(m6)$coefficients[[11]], summary(m6)$coefficients[[12]],summary(m7)$coefficients[[11]], summary(m7)$coefficients[[12]],summary(m8)$coefficients[[11]], summary(m8)$coefficients[[12]],summary(m9)$coefficients[[11]], summary(m9)$coefficients[[12]],summary(m10)$coefficients[[11]], summary(m10)$coefficients[[12]])
  
  stars <- sapply(1:20, function(x) ifelse(pvals[x] < .05, "*", "N.S."))
  
  df$pvals <- pvals
  df$stars <- stars
  
  df2 <- df
  
  df2$moderator <- paste(var1, "= 0", sep = "")
  
  df <- df[rep(c(1,6,11,16), 5) + rep(0:4, each = 4),]
  limits <- aes(ymax = coefs + ses*1.96, ymin= coefs - ses*1.96)
  

  
  row.names(df) <- 1:20
  
  #t2 <- paste("Emotions and Misperceptions ", "(", var1, " = 0)", sep = "")
  
  p <- ggplot(df, aes(x=groups, y=coefs, col = em_lev, shape = em_lev)) + geom_pointrange(limits, position=position_dodge(width=.5)) + theme_bw() + labs(x = "Outcomes", y = "Average Misperception", title = t2) + scale_colour_manual(name = "Condition & Outcome", labels = c("Anger (Local)", "Anxiety (Local)", "Anger (National)", "Anxiety (National)"), values = c("black", "black", "gray", "gray")) + scale_shape_manual(name = "Condition & Outcome", labels = c("Anger (Local)", "Anxiety (Local)", "Anger (National)", "Anxiety (National)"), values = c(19, 17, 19, 17))
  
  intercepts <- rep(c(0, 4, 8, 12, 16), each = 2) + c(1,2)
  x1 <- rep(c(1, 2, 3, 4, 5), each = 2) + c(-.25, .25)
  x2 <- rep(c(1, 2, 3, 4, 5), each = 2)
  
  for (i in intercepts) {
    
    s <- which(i == intercepts)
    
    p <- p + geom_segment(x = x1[s], y = df$intercepts[i], 
                          xend = x2[s], yend = df$intercepts[i], col = "black")
    
  }
  
  
  p2 <- p + theme(plot.title = element_text(hjust = 0.5)) + theme(text = element_text(size=20))
  
  # this creates an empty plot
  grid.newpage()
  
  # this generates two stacked plots
  grid.draw(rbind(ggplotGrob(p1), ggplotGrob(p2), size = "last"))
  
  # when you run the function, you get plots and important estimates!
  return(rbind(df1, df2))
  

}

